% File src/library/stats/man/dendrogram.Rd
% Part of the R package, https://www.R-project.org
% Copyright 1995-2020 R Core Team
% Copyright 2002-2015 The R Foundation
% Distributed under GPL 2 or later

\name{dendrogram}
\title{General Tree Structures}
\alias{dendrogram}% the class
\alias{as.dendrogram}
\alias{as.dendrogram.dendrogram}
\alias{as.dendrogram.hclust}
\alias{as.hclust.dendrogram}
\alias{cut.dendrogram}
\alias{[[.dendrogram}
\alias{merge.dendrogram}
\alias{nobs.dendrogram}
\alias{plot.dendrogram}
\alias{print.dendrogram}
\alias{rev.dendrogram}
\alias{str.dendrogram}
\alias{is.leaf}
\description{
  Class \code{"dendrogram"} provides general functions for handling
  tree-like structures.  It is intended as a replacement for similar
  functions in hierarchical clustering and classification/regression
  trees, such that all of these can use the same engine for plotting or
  cutting trees.
}
\usage{
as.dendrogram(object, \dots)
\method{as.dendrogram}{hclust}(object, hang = -1, check = TRUE, \dots)

\method{as.hclust}{dendrogram}(x, \dots)

\method{plot}{dendrogram}(x, type = c("rectangle", "triangle"),
      center = FALSE,
      edge.root = is.leaf(x) || !is.null(attr(x,"edgetext")),
      nodePar = NULL, edgePar = list(),
      leaflab = c("perpendicular", "textlike", "none"),
      dLeaf = NULL, xlab = "", ylab = "", xaxt = "n", yaxt = "s",
      horiz = FALSE, frame.plot = FALSE, xlim, ylim, \dots)

\method{cut}{dendrogram}(x, h, \dots)

\method{merge}{dendrogram}(x, y, \dots, height,
      adjust = c("auto", "add.max", "none"))

\method{nobs}{dendrogram}(object, \dots)

\method{print}{dendrogram}(x, digits, \dots)

\method{rev}{dendrogram}(x)

\method{str}{dendrogram}(object, max.level = NA, digits.d = 3,
    give.attr = FALSE, wid = getOption("width"),
    nest.lev = 0, indent.str = "",
    last.str = getOption("str.dendrogram.last"), stem = "--",
    \dots)

is.leaf(object)
}
\arguments{
  \item{object}{any \R object that can be made into one of class
    \code{"dendrogram"}.}
  \item{x, y}{object(s) of class \code{"dendrogram"}.}
  \item{hang}{numeric scalar indicating how the \emph{height} of leaves
    should be computed from the heights of their parents; see
    \code{\link{plot.hclust}}.}
  \item{check}{logical indicating if \code{object} should be checked for
    validity.  This check is not necessary when \code{x} is known to be
    valid such as when it is the direct result of \code{hclust()}.  The
    default is \code{check=TRUE}, e.g.\sspace{}for protecting against memory
    explosion with invalid inputs.}
  \item{type}{type of plot.}
  \item{center}{logical; if \code{TRUE}, nodes are plotted centered with
    respect to the leaves in the branch.  Otherwise (default), plot them
    in the middle of all direct child nodes.}
  \item{edge.root}{logical; if true, draw an edge to the root node.}
  \item{nodePar}{a \code{list} of plotting parameters to use for the
    nodes (see \code{\link{points}}) or \code{NULL} by default which
    does not draw symbols at the nodes.  The list may contain components
    named \code{pch}, \code{cex}, \code{col}, \code{xpd},
    and/or \code{bg} each of
    which can have length two for specifying separate attributes for
    \emph{inner} nodes and \emph{leaves}.  Note that the default of
    \code{pch} is \code{1:2}, so you may want to use \code{pch = NA} if
    you specify \code{nodePar}.}
  \item{edgePar}{a \code{list} of plotting parameters to use for the
    edge \code{\link{segments}} and labels (if there's an
    \code{edgetext}).  The list may contain components
    named \code{col}, \code{lty} and \code{lwd} (for the segments),
    \code{p.col}, \code{p.lwd}, and \code{p.lty} (for the
    \code{\link{polygon}} around the text) and \code{t.col} for the text
    color.  As with \code{nodePar}, each can have length two for
    differentiating leaves and inner nodes.
  }
  \item{leaflab}{a string specifying how leaves are labeled.  The
    default \code{"perpendicular"} write text vertically (by default).\cr
    \code{"textlike"} writes text horizontally (in a rectangle), and \cr
    \code{"none"} suppresses leaf labels.}
  \item{dLeaf}{a number specifying the \bold{d}istance in user
    coordinates between the tip of a leaf and its label.  If \code{NULL}
    as per default, 3/4 of a letter width or height is used.}
  \item{horiz}{logical indicating if the dendrogram should be drawn
    \emph{horizontally} or not.}
  \item{frame.plot}{logical indicating if a box around the plot should
    be drawn, see \code{\link{plot.default}}.}
  \item{h}{height at which the tree is cut.}
  \item{height}{height at which the two dendrograms should be merged.  If not
    specified (or \code{NULL}), the default is ten percent larger than
    the (larger of the) two component heights.}
  \item{adjust}{a string determining if the leaf values should be
    adjusted.  The default, \code{"auto"}, checks if the (first) two
    dendrograms both start at \code{1}; if they do, \code{"add.max"} is
    chosen, which adds the maximum of the previous dendrogram leaf
    values to each leaf of the \dQuote{next} dendrogram.  Specifying
    \code{adjust} to another value skips the check and hence is a tad
    more efficient.}
  \item{xlim, ylim}{optional x- and y-limits of the plot, passed to
    \code{\link{plot.default}}.  The defaults for these show the full
    dendrogram.}
  \item{\dots, xlab, ylab, xaxt, yaxt}{graphical parameters, or arguments for
    other methods.}
  \item{digits}{integer specifying the precision for printing, see
    \code{\link{print.default}}.}
  \item{max.level, digits.d, give.attr, wid, nest.lev, indent.str}{arguments
    to \code{str}, see \code{\link{str.default}()}.  Note that
    \code{give.attr = FALSE} still shows \code{height} and \code{members}
    attributes for each node.}
  \item{last.str, stem}{strings used for \code{str()} specifying how the
    last branch (at each level) should start and the \emph{stem}
    to use for each dendrogram branch.  In some environments, using
    \code{last.str = "'"} will provide much nicer looking output, than
    the historical default \code{last.str = "`"}.}
}
\details{
  The dendrogram is directly represented as a nested list where each
  component corresponds to a branch of the tree.  Hence, the first
  branch of tree \code{z} is \code{z[[1]]}, the second branch of the
  corresponding subtree is \code{z[[1]][[2]]}, or shorter
  \code{z[[c(1,2)]]}, etc..  Each node of the tree
  carries some information needed for efficient plotting or cutting as
  attributes, of which only \code{members}, \code{height} and
  \code{leaf} for leaves are compulsory:
  \describe{
    \item{\code{members}}{total number of leaves in the branch}
    \item{\code{height}}{numeric non-negative height at which the node
      is plotted.}
    \item{\code{midpoint}}{numeric horizontal distance of the node from
      the left border (the leftmost leaf) of the branch (unit 1 between
      all leaves).  This is used for \code{plot(*, center = FALSE)}.}
    \item{\code{label}}{character; the label of the node}
    \item{\code{x.member}}{for \code{cut()$upper},
      the number of \emph{former} members; more generally a substitute
      for the \code{members} component used for \sQuote{horizontal}
      (when \code{horiz = FALSE}, else \sQuote{vertical}) alignment.}
    \item{\code{edgetext}}{character; the label for the edge leading to
      the node}
    \item{\code{nodePar}}{a named list (of length-1 components)
      specifying node-specific attributes for \code{\link{points}}
      plotting, see the \code{nodePar} argument above.}
    \item{\code{edgePar}}{a named list (of length-1 components)
      specifying attributes for \code{\link{segments}} plotting of the
      edge leading to the node, and drawing of the \code{edgetext} if
      available, see the \code{edgePar} argument above.}
    \item{\code{leaf}}{logical, if \code{TRUE}, the node is a leaf of
      the tree.}%  This will often be a \code{\link{character}} which can
    %      be used for plotting instead of the \code{text} attribute.}
  }

  \code{cut.dendrogram()} returns a list with components \code{$upper}
  and \code{$lower}, the first is a truncated version of the original
  tree, also of class \code{dendrogram}, the latter a list with the
  branches obtained from cutting the tree, each a \code{dendrogram}.

  There are \code{\link{[[}}, \code{\link{print}}, and \code{\link{str}}
  methods for \code{"dendrogram"} objects where the first one
  (extraction) ensures that selecting sub-branches keeps the class,
  i.e., returns a dendrogram even if only a leaf.
  On the other hand, \code{\link{[}} (\emph{single} bracket) extraction
  returns the underlying list structure.%, useful, e.g., for inspection.

  Objects of class \code{"hclust"} can be converted to class
  \code{"dendrogram"} using method \code{as.dendrogram()}, and since R
  2.13.0, there is also a \code{\link{as.hclust}()} method as an inverse.

  \code{rev.dendrogram} simply returns the dendrogram \code{x} with
  reversed nodes, see also \code{\link{reorder.dendrogram}}.

  The \code{\link{merge}(x, y, ...)} method merges two or more
  dendrograms into a new one which has \code{x} and \code{y} (and
  optional further arguments) as branches.  Note that before \R 3.1.2,
  \code{adjust = "none"} was used implicitly, which is invalid when,
  e.g., the dendrograms are from \code{as.dendrogram(hclust(..))}.

  \code{\link{nobs}(object)} returns the total number of leaves (the
  \code{members} attribute, see above).

  \code{is.leaf(object)} returns logical indicating if \code{object} is a
  leaf (the most simple dendrogram).

  \code{plotNode()} and \code{plotNodeLimit()} are helper functions.
}
\note{
  \describe{
    \item{\code{plot()}:}{When using \code{type = "triangle"},
      \code{center = TRUE} often looks better.}
    \item{\code{str(d)}:}{If you really want to see the \emph{internal}
      structure, use \code{str(unclass(d))} instead.}
  }
}
\section{Warning}{
  Some operations on dendrograms such as \code{merge()} make use of
  recursion.  For deep trees it may be necessary to increase
  \code{\link{options}("expressions")}: if you do, you are likely to need
  to set the C stack size (\code{\link{Cstack_info}()[["size"]]}) larger
  than the default where possible.
}
\seealso{
  \code{\link{dendrapply}} for applying a function to \emph{each} node.
  \code{\link{order.dendrogram}} and \code{\link{reorder.dendrogram}};
  further, the \code{\link{labels}} method.
}
\examples{
require(graphics); require(utils)

hc <- hclust(dist(USArrests), "ave")
(dend1 <- as.dendrogram(hc)) # "print()" method
str(dend1)          # "str()" method
str(dend1, max.level = 2, last.str =  "'") # only the first two sub-levels
oo <- options(str.dendrogram.last = "\\\\") # yet another possibility
str(dend1, max.level = 2) # only the first two sub-levels
options(oo)  # .. resetting them

op <- par(mfrow =  c(2,2), mar = c(5,2,1,4))
plot(dend1)
## "triangle" type and show inner nodes:
plot(dend1, nodePar = list(pch = c(1,NA), cex = 0.8, lab.cex = 0.8),
      type = "t", center = TRUE)
plot(dend1, edgePar = list(col = 1:2, lty = 2:3),
     dLeaf = 1, edge.root = TRUE)
plot(dend1, nodePar = list(pch = 2:1, cex = .4*2:1, col = 2:3),
     horiz = TRUE)

## simple test for as.hclust() as the inverse of as.dendrogram():
stopifnot(identical(as.hclust(dend1)[1:4], hc[1:4]))

dend2 <- cut(dend1, h = 70)
## leaves are wrong horizontally in R 4.0 and earlier:
plot(dend2$upper)
plot(dend2$upper, nodePar = list(pch = c(1,7), col = 2:1))
##  dend2$lower is *NOT* a dendrogram, but a list of .. :
plot(dend2$lower[[3]], nodePar = list(col = 4), horiz = TRUE, type = "tr")
## "inner" and "leaf" edges in different type & color :
plot(dend2$lower[[2]], nodePar = list(col = 1),   # non empty list
     edgePar = list(lty = 1:2, col = 2:1), edge.root = TRUE)
par(op)
d3 <- dend2$lower[[2]][[2]][[1]]
stopifnot(identical(d3, dend2$lower[[2]][[c(2,1)]]))
str(d3, last.str = "'")

## to peek at the inner structure "if you must", use '[..]' indexing :
str(d3[2][[1]]) ## or the full
str(d3[])

## merge() to join dendrograms:
(d13 <- merge(dend2$lower[[1]], dend2$lower[[3]]))
## merge() all parts back (using default 'height' instead of original one):
den.1 <- Reduce(merge, dend2$lower)
## or merge() all four parts at same height --> 4 branches (!)
d. <- merge(dend2$lower[[1]], dend2$lower[[2]], dend2$lower[[3]],
            dend2$lower[[4]])
## (with a warning) or the same using  do.call :
stopifnot(identical(d., do.call(merge, dend2$lower)))
plot(d., main = "merge(d1, d2, d3, d4)  |->  dendrogram with a 4-split")

## "Zoom" in to the first dendrogram :
plot(dend1, xlim = c(1,20), ylim = c(1,50))

nP <- list(col = 3:2, cex = c(2.0, 0.75), pch =  21:22,
           bg =  c("light blue", "pink"),
           lab.cex = 0.75, lab.col = "tomato")
plot(d3, nodePar= nP, edgePar = list(col = "gray", lwd = 2), horiz = TRUE)
%% now add some "edgetext" :
addE <- function(n) {
      if(!is.leaf(n)) {
        attr(n, "edgePar") <- list(p.col = "plum")
        attr(n, "edgetext") <- paste(attr(n,"members"),"members")
      }
      n
}
d3e <- dendrapply(d3, addE)
plot(d3e, nodePar =  nP)
plot(d3e, nodePar =  nP, leaflab = "textlike")

%% BUG:  edge labeling *and* leaflab = "textlike" both fail with  horiz = TRUE:
%% BUG plot(d3e, nodePar = nP, leaflab = "textlike", horiz = TRUE)
}
\keyword{multivariate}
\keyword{tree}% FIXME: want as.dendrogram.tree() etc!
\keyword{hplot}% only for plot.()
